*********************************************************************************
*                                                                               *
*                Election Frequency - Preparing the Dataset     	            *
*                                                                               *
*                        Import from Excel										*
*							   CEE DATA                                 			*
*                                                                               *
*********************************************************************************

* Setting the beginning of the path *
global beg_path `"C:\Users\JohnSmith\Dropbox\Replication_EF\"'  
 
* Working Directory *
cd `"${beg_path}Data\Analysis 1\Collection\"'
 
* Importing CEE data *
import excel using CEE_electiondata.xlsx, ///
         sheet(Data) firstrow all clear 
		 
/* Destring */
destring Year Length Round Type El_Type Share_Elect Turnout Non_Dem First Second Elsystem, replace force  

/* Date, 2019 as a limit date to indicate Stata the first two digits for year */
gen	date = date(Date, "DMY", 2019)
order date, after(Date)

*Rename 
rename (Non_Dem Share_Elect) (Nondem Coverage_pop)

***Generate Type & Round ***
replace Type = 1 if inlist(El_Type, 5, 6)
replace Type = 2 if inlist(El_Type, 1, 2)
replace Type = 3 if inlist(El_Type, 3, 4)
replace Type = 4 if inlist(El_Type, 7)
replace Type = 5 if inlist(El_Type, 12)
replace Type = 6 if inlist(El_Type, 10, 11)
replace Type = 7 if inlist(El_Type, 8, 9)
replace Type = 8 if inlist(El_Type, 13, 14)

replace Round = 1 
replace Round = 2 if inlist(El_Type, 2, 4, 6, 9, 11, 14)

gen missing_info_cov = .
replace missing_info_cov = 1 if Coverage_pop ==. 
lab var missing_info_cov "Observations where population coverage is missing or was estimated"

*Estimating population coverage Lithuania
replace Coverage_pop = 0.5 if Country == "Lithuania" & Year == 1990 & El_Type == 14 
replace Coverage_pop = 0.61 if  Country == "Bulgaria" & Year == 2011 & El_Type == 14  

* Country (the codes start from twenty since, for Western countries, the codes range from 1 to 12) 
encode Country, gen(temp1)  
drop Country
label define Country 21 "Bulgaria" 22 "Czech Republic" 23  "Estonia" 24  "Hungary" 25  "Latvia" 26  "Lithuania" 27  "Poland" 28  "Romania" 29  "Slovakia" 30 "Slovenia" 
gen Country = temp1+20  
label values Country Country
fre Country
drop temp1

/* clean & label */
label var Length "Number of days"
label var Round "1st/2nd round"
label var Coverage_pop "Population coverage in %"
label var Nondem "Elections Character"
label var date "Stata Date"
 
* Type values
label de Type 1 "Presidential" 2 "Lowerhouse" 3 "Upperhouse" 4 "Referendum" 5 "European" 6 "Regional" 7 "Local" 8 "Municipal" 10 "Others", replace
label val Type Type


* Nondem values
label de Nondem 0 "Dem" 1 "Nondem" 
label val Nondem Nondem

save `"${beg_path}Data\Analysis 1\electionsdata_CEE.dta"', replace



